load packages
library(ggplot2)
library(data.table)
library(stringr)
library(dplyr)
library(tidyr)
library(rprojroot)
library(vegan)
library(ggpubr)
library(rstatix)
library(cowplot)
library(wesanderson)
#library(BiocManager)
#library(ComplexHeatmap)
#library(ggridges)
set paths and filenames
### files
long_table=sprintf("%s/data/mp142_TGVG1.1_MPA4_combined_abundance_table_longform1.tsv", find_rstudio_root_file())
metadata_table=sprintf("%s/data/some_teddy_MP142_metadata2.all_samples1.delivery.csv", find_rstudio_root_file())
iphop_table=sprintf("%s/data/TGVG_database_v1.1.exemplars.iphop_genus_outputs1.filt.csv", find_rstudio_root_file())
taxonomy_table=sprintf("%s/data/TGVG_database_v1.1.VC_taxonomy_table.csv", find_rstudio_root_file())
load long table and metadata, merge
long_dt <- fread(sprintf("%s", long_table), sep = "\t", header = T) %>%
select(sampleID, rel_abundance, lineage) %>%
mutate(sampleID = as.character(sampleID),
kingdom = case_when(grepl("k__Bac", lineage) ~ "Bacteria",
grepl("k__Vir", lineage) ~ "Virus",
grepl("k__Ar", lineage) ~ "Archea",
grepl("k__Euk", lineage) ~ "Eukaryota",
TRUE ~ "other"))
meta_dt <- fread(sprintf("%s", metadata_table), sep = ",", header = T) %>%
select(-V1) %>%
mutate(sample = as.character(sample)) %>%
group_by(mask_id) %>%
filter(n() > 30) %>%
ungroup()
merge_dt <- merge(long_dt, meta_dt, by.x = "sampleID", by.y ="sample")
merge_dt %>%
group_by(mask_id) %>%
filter(grepl("Faecalibacterium_prausnitzii", lineage)) %>%
summarize(f_praus = n_distinct(sampleID)) %>%
arrange(desc(f_praus))
load iphop host prediction table
iphop_dt <- fread(sprintf("%s", iphop_table), sep = ",", header = T, col.names = c("name", "AAI", "host_lineage", "confidence", "methods")) %>%
mutate(host_genus = gsub(".*g__","g__", host_lineage),
host_family = gsub(";g__.*","", host_lineage))
tax_dt <- fread(sprintf("%s", taxonomy_table), sep = ",", header = T) %>%
select(c(name, Species))
iphop_tax_dt <- merge(iphop_dt, tax_dt, by = "name") %>%
mutate(species = gsub("s__", "", Species))
look at subject 248418
sub248418_dt <- merge_dt %>%
filter(mask_id == 248418) %>%
mutate(species = gsub(".*\\|s__", "", lineage))
sub248418_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Faecalibacterium", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
geom_col() +
geom_line() +
scale_fill_brewer(palette = "Dark2") +
scale_color_brewer(palette = "Dark2") +
facet_wrap(vars(reorder(species, avg_age)),
ncol = 1, strip.position = "right",
labeller = label_wrap_gen(width = 13)) +
theme_linedraw() +
theme(legend.position = "Off",
strip.text.y.right = element_text(angle = 0))
ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub248418_faecali_bacteria1.pdf", width = 8, height = 2.8)

pal = wes_palette("FantasticFox1", 23, type = "continuous")
merge(sub248418_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Faecalibacterium") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup() %>%
ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
geom_col() +
geom_line() +
scale_fill_manual(values = pal) +
scale_color_manual(values = pal) +
facet_wrap(vars(reorder(species, avg_age)),
ncol = 1, strip.position = "right",
labeller = label_wrap_gen(width = 13)) +
theme_bw() +
theme(legend.position = "Off",
strip.text.y.right = element_text(angle = 0))
ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub248418_faecali_phage1.pdf", width = 8, height = 9.5)

complex heatmap
bac_fp_sub248418 <- sub248418_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Faecalibacterium", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ungroup()
bac_fp_sub248418$kingdom <- "Bacteria"
phage_fp_sub248418 <- merge(sub248418_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Faecalibacterium") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup()
phage_fp_sub248418$kingdom <- "Virus"
sub248418_fp_all_dt <- rbind(bac_fp_sub248418, phage_fp_sub248418)
tempp <- sub248418_fp_all_dt %>%
mutate(species = gsub("Faecalibacterium", "F", species)) %>%
ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)),
color = kingdom, size = rel_abundance)) +
geom_point(alpha = 0.8) +
scale_color_manual(values = c("orangered", "cadetblue")) +
scale_size(name = "rel.\nabundance") +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "SGB", x = "day of life") +
theme(strip.text.y.right = element_text(angle = 0),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "left")
detectp <- sub248418_fp_all_dt %>%
group_by(species, kingdom, avg_age) %>%
summarize(detected = n()) %>%
ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
geom_col(color = "grey10") +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "", x = "# times\ndetected") +
theme(strip.text.y.right = element_blank(),
strip.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_blank())
`summarise()` has grouped output by 'species', 'kingdom'. You can override using the `.groups` argument.
combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))
combp


subject 202376

pal = wes_palette("FantasticFox1", 15, type = "continuous")
merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Faecalibacterium") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup() %>%
ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
geom_col() +
geom_line() +
scale_fill_manual(values = pal) +
scale_color_manual(values = pal) +
facet_wrap(vars(reorder(species, avg_age)),
ncol = 1, strip.position = "right",
labeller = label_wrap_gen(width = 13)) +
xlim(c(0,2100)) +
theme_bw() +
theme(legend.position = "Off",
strip.text.y.right = element_text(angle = 0))
ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_faecali_phage1.pdf", width = 8, height = 9.5)

sub202376_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Dialister", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
geom_col() +
geom_line() +
scale_fill_brewer(palette = "Dark2") +
scale_color_brewer(palette = "Dark2") +
facet_wrap(vars(reorder(species, avg_age)),
ncol = 1, strip.position = "right",
labeller = label_wrap_gen(width = 13)) +
theme_linedraw() +
xlim(c(0,2100)) +
theme(legend.position = "Off",
strip.text.y.right = element_text(angle = 0))

#ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_dialister_bacteria1.pdf", width = 8, height = 2.8)
pal = wes_palette("FantasticFox1", 15, type = "continuous")
merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Dialister") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup() %>%
ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
geom_col() +
geom_line() +
scale_fill_manual(values = pal) +
scale_color_manual(values = pal) +
facet_wrap(vars(reorder(species, avg_age)),
ncol = 1, strip.position = "right",
labeller = label_wrap_gen(width = 13)) +
xlim(c(0,2100)) +
theme_bw() +
theme(legend.position = "Off",
strip.text.y.right = element_text(angle = 0))
ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_dialister_phage1.pdf", width = 8, height = 9.5)

Alistipes


Roseburia

pal = wes_palette("FantasticFox1", 15, type = "continuous")
merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Roseburia") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup() %>%
ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
geom_col() +
geom_line() +
scale_fill_manual(values = pal) +
scale_color_manual(values = pal) +
facet_wrap(vars(reorder(species, avg_age)),
ncol = 1, strip.position = "right",
labeller = label_wrap_gen(width = 13)) +
xlim(c(0,2100)) +
theme_bw() +
theme(legend.position = "Off",
strip.text.y.right = element_text(angle = 0))
ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_Roseburia_phage1.pdf", width = 8, height = 9.5)
202376 tilemap
bac_fp_sub202376 <- sub202376_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Faecalibacterium", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ungroup()
bac_fp_sub202376$kingdom <- "Bacteria"
phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Faecalibacterium") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup()
phage_fp_sub202376$kingdom <- "Virus"
sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)
tempp <- sub202376_fp_all_dt %>%
mutate(species = gsub("Faecalibacterium", "F", species)) %>%
ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)),
color = kingdom, size = rel_abundance)) +
geom_point(alpha = 0.8) +
scale_color_manual(values = c("orangered", "cadetblue")) +
scale_size(name = "rel.\nabundance") +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "SGB", x = "day of life") +
theme(strip.text.y.right = element_text(angle = 0),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "left")
detectp <- sub202376_fp_all_dt %>%
group_by(species, kingdom, avg_age) %>%
summarize(detected = n()) %>%
ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
geom_col() +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "", x = "# times\ndetected") +
theme(strip.text.y.right = element_blank(),
strip.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_blank())
`summarise()` has grouped output by 'species', 'kingdom'. You can override using the `.groups` argument.
combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))
combp

check most prevalent bacteria with few other SGBs in genus
sub202376_dt %>%
filter(kingdom == "Bacteria") %>%
mutate(genus = gsub("\\|s__.*", "", lineage),
genus = gsub(".*\\|g__", "", genus)) %>%
group_by(species) %>%
mutate(detected = n()) %>%
ungroup() %>%
group_by(genus) %>%
mutate(distinct_g = n_distinct(species)) %>%
ungroup() %>%
distinct(genus, species, detected, distinct_g) %>%
arrange(distinct_g, desc(detected))
202376 tilemap
bac_fp_sub202376 <- sub202376_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Flavonifractor", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ungroup()
bac_fp_sub202376$kingdom <- "Bacteria"
phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Flavonifractor") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup()
phage_fp_sub202376$kingdom <- "Virus"
sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)
tempp <- sub202376_fp_all_dt %>%
mutate(species = gsub("Flavonifractor", "F", species)) %>%
ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)),
color = kingdom, size = rel_abundance)) +
geom_point(alpha = 0.8) +
scale_color_manual(values = c("orangered", "cadetblue")) +
scale_size(name = "rel.\nabundance") +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "SGB", x = "day of life") +
theme(strip.text.y.right = element_text(angle = 0),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "left")
detectp <- sub202376_fp_all_dt %>%
group_by(species, kingdom, avg_age) %>%
summarize(detected = n()) %>%
ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
geom_col() +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "", x = "# times\ndetected") +
theme(strip.text.y.right = element_blank(),
strip.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_blank())
`summarise()` has grouped output by 'species', 'kingdom'. You can override using the `.groups` argument.
combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))
combp

202376 tilemap
bac_fp_sub202376 <- sub202376_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Escherichia", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ungroup()
bac_fp_sub202376$kingdom <- "Bacteria"
phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Escherichia") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup()
phage_fp_sub202376$kingdom <- "Virus"
sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)
tempp <- sub202376_fp_all_dt %>%
mutate(species = gsub("Escherichia", "E", species)) %>%
ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)),
color = kingdom, size = rel_abundance)) +
geom_point(alpha = 0.8) +
scale_color_manual(values = c("orangered", "cadetblue")) +
scale_size(name = "rel.\nabundance") +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "SGB", x = "day of life") +
theme(strip.text.y.right = element_text(angle = 0),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "left")
detectp <- sub202376_fp_all_dt %>%
group_by(species, kingdom, avg_age) %>%
summarize(detected = n()) %>%
ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
geom_col() +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "", x = "# times\ndetected") +
theme(strip.text.y.right = element_blank(),
strip.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_blank())
`summarise()` has grouped output by 'species', 'kingdom'. You can override using the `.groups` argument.
combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))
combp

202376 tilemap
bac_fp_sub202376 <- sub202376_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Barnesiella", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ungroup()
bac_fp_sub202376$kingdom <- "Bacteria"
phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Barnesiella") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup()
phage_fp_sub202376$kingdom <- "Virus"
sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)
tempp <- sub202376_fp_all_dt %>%
mutate(species = gsub("Barnesiella", "B", species)) %>%
ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)),
color = kingdom, size = rel_abundance)) +
geom_point(alpha = 0.8) +
scale_color_manual(values = c("orangered", "cadetblue")) +
scale_size(name = "rel.\nabundance") +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "SGB", x = "day of life") +
theme(strip.text.y.right = element_text(angle = 0),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "left")
detectp <- sub202376_fp_all_dt %>%
group_by(species, kingdom, avg_age) %>%
summarize(detected = n()) %>%
ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
geom_col() +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "", x = "# times\ndetected") +
theme(strip.text.y.right = element_blank(),
strip.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_blank())
`summarise()` has grouped output by 'species', 'kingdom'. You can override using the `.groups` argument.
combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))
combp

Parabacteroides 202376 tilemap
bac_fp_sub202376 <- sub202376_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Parabacteroides", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ungroup()
bac_fp_sub202376$kingdom <- "Bacteria"
phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Parabacteroides") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup()
phage_fp_sub202376$kingdom <- "Virus"
sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)
tempp <- sub202376_fp_all_dt %>%
mutate(species = gsub("Parabacteroides", "P", species)) %>%
ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)),
color = kingdom, size = rel_abundance)) +
geom_point(alpha = 0.8) +
scale_color_manual(values = c("orangered", "cadetblue")) +
scale_size(name = "rel.\nabundance") +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "SGB", x = "day of life") +
theme(strip.text.y.right = element_text(angle = 0),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "left")
detectp <- sub202376_fp_all_dt %>%
group_by(species, kingdom, avg_age) %>%
summarize(detected = n()) %>%
ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
geom_col() +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "", x = "# times\ndetected") +
theme(strip.text.y.right = element_blank(),
strip.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_blank())
`summarise()` has grouped output by 'species', 'kingdom'. You can override using the `.groups` argument.
combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))
combp

Bacteroides 202376 tilemap
bac_fp_sub202376 <- sub202376_dt %>%
select(lineage, species, age_days, sampleID, rel_abundance) %>%
complete(species, age_days) %>%
filter(grepl("g__Blautia", lineage)) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
group_by(species) %>%
mutate(avg_age = mean(age_days),
species = gsub("_", " ",species),
species = gsub("\\|t", "",species)) %>%
ungroup()
bac_fp_sub202376$kingdom <- "Bacteria"
phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
filter(host_genus == "g__Blautia") %>%
group_by(species) %>%
mutate(avg_age = mean(age_days)) %>%
complete(age_days) %>%
select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
ungroup()
phage_fp_sub202376$kingdom <- "Virus"
sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)
tempp <- sub202376_fp_all_dt %>%
mutate(species = gsub("Veillonella", "V", species)) %>%
ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)),
color = kingdom, size = rel_abundance)) +
geom_point(alpha = 0.8) +
scale_color_manual(values = c("orangered", "cadetblue")) +
scale_size(name = "rel.\nabundance") +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "SGB", x = "day of life") +
theme(strip.text.y.right = element_text(angle = 0),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
legend.position = "left")
detectp <- sub202376_fp_all_dt %>%
group_by(species, kingdom, avg_age) %>%
summarize(detected = n()) %>%
ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
geom_col() +
facet_grid(vars(kingdom), scales = "free_y", space="free") +
theme_bw() +
labs(y = "", x = "# times\ndetected") +
theme(strip.text.y.right = element_blank(),
strip.background = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_blank())
`summarise()` has grouped output by 'species', 'kingdom'. You can override using the `.groups` argument.
combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))
combp

---
title: "longterm bacteria and their phages"
output: html_notebook
---
load packages

```{r}
library(ggplot2)
library(data.table)
library(stringr)
library(dplyr)
library(tidyr)
library(rprojroot)
library(vegan)
library(ggpubr)
library(rstatix)
library(cowplot)
library(wesanderson)
#library(BiocManager)
#library(ComplexHeatmap)
#library(ggridges)
```


set paths and filenames

```{r}
### files
long_table=sprintf("%s/data/mp142_TGVG1.1_MPA4_combined_abundance_table_longform1.tsv", find_rstudio_root_file())
metadata_table=sprintf("%s/data/some_teddy_MP142_metadata2.all_samples1.delivery.csv", find_rstudio_root_file())
iphop_table=sprintf("%s/data/TGVG_database_v1.1.exemplars.iphop_genus_outputs1.filt.csv", find_rstudio_root_file())
taxonomy_table=sprintf("%s/data/TGVG_database_v1.1.VC_taxonomy_table.csv", find_rstudio_root_file())

```

load long table and metadata, merge

```{r}
long_dt <- fread(sprintf("%s", long_table), sep = "\t", header = T) %>%
  select(sampleID, rel_abundance, lineage) %>%
  mutate(sampleID = as.character(sampleID),
         kingdom = case_when(grepl("k__Bac", lineage) ~ "Bacteria", 
                             grepl("k__Vir", lineage) ~ "Virus",
                             grepl("k__Ar", lineage) ~ "Archea",
                             grepl("k__Euk", lineage) ~ "Eukaryota",
                             TRUE ~ "other"))

meta_dt <- fread(sprintf("%s", metadata_table), sep = ",", header = T) %>%
  select(-V1) %>%
  mutate(sample = as.character(sample)) %>%
  group_by(mask_id) %>%
  filter(n() > 30) %>%
  ungroup()

merge_dt <- merge(long_dt, meta_dt, by.x = "sampleID", by.y ="sample")

```


```{r}
merge_dt %>%
  group_by(mask_id) %>%
  filter(grepl("Faecalibacterium_prausnitzii", lineage)) %>%
  summarize(f_praus = n_distinct(sampleID)) %>%
  arrange(desc(f_praus))
```

load iphop host prediction table
```{r}
iphop_dt <- fread(sprintf("%s", iphop_table), sep = ",", header = T, col.names = c("name", "AAI", "host_lineage", "confidence", "methods")) %>%
  mutate(host_genus = gsub(".*g__","g__", host_lineage),
         host_family = gsub(";g__.*","", host_lineage))

tax_dt <- fread(sprintf("%s", taxonomy_table), sep = ",", header = T) %>%
  select(c(name, Species))

iphop_tax_dt <- merge(iphop_dt, tax_dt, by = "name") %>%
  mutate(species = gsub("s__", "", Species))
```


look at subject 248418
```{r}
sub248418_dt <- merge_dt %>%
  filter(mask_id == 248418) %>%
  mutate(species = gsub(".*\\|s__", "", lineage))

sub248418_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Faecalibacterium", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_brewer(palette = "Dark2") +
  scale_color_brewer(palette = "Dark2") +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  theme_linedraw() +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub248418_faecali_bacteria1.pdf", width = 8, height = 2.8)

```

```{r}

pal = wes_palette("FantasticFox1", 23, type = "continuous")

merge(sub248418_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Faecalibacterium") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup() %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  theme_bw() +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub248418_faecali_phage1.pdf", width = 8, height = 9.5)

```

complex heatmap
```{r}
bac_fp_sub248418 <- sub248418_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Faecalibacterium", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ungroup()

bac_fp_sub248418$kingdom <- "Bacteria"

phage_fp_sub248418 <- merge(sub248418_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Faecalibacterium") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup()

phage_fp_sub248418$kingdom <- "Virus"

sub248418_fp_all_dt <- rbind(bac_fp_sub248418, phage_fp_sub248418)

```


```{r}
sub248418_fp_all_dt %>%
  ggplot(aes(x = factor(age_days), y = reorder(species, avg_age), 
             fill = rel_abundance)) +
  geom_tile() +
  scale_fill_gradient(low = "cadetblue", high = "black") +
  #facet_wrap(vars(kingdom), ncol = 1, scales = "free_y") +
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

tempp <- sub248418_fp_all_dt %>%
  mutate(species = gsub("Faecalibacterium", "F", species)) %>%
  ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)), 
             color = kingdom, size = rel_abundance)) +
  geom_point(alpha = 0.8) +
  scale_color_manual(values = c("orangered", "cadetblue")) +
  scale_size(name = "rel.\nabundance") +  
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "SGB", x = "day of life") +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "left")

detectp <- sub248418_fp_all_dt %>%
  group_by(species, kingdom, avg_age) %>%
  summarize(detected = n()) %>%
  ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
  geom_col(color = "grey10") +
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "", x = "# times\ndetected") +
  theme(strip.text.y.right = element_blank(), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.text.y = element_blank())

combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))

combp

ggsave(combp, file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub248418_faecali_bac_phage2.pdf", width = 8, height = 5)

```


```{r}
sub248418_fp_all_dt %>%
  ggplot(aes(x = age_days, y = reorder(species, avg_age), 
             fill = kingdom)) +
  geom_col() +
  geom_line() +
  facet_grid(vars(kingdom, species), scales = "free_y", space="free",
             labeller = label_wrap_gen(width = 13)) +
  theme_bw() +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
```

subject 202376
```{r}
sub202376_dt <- merge_dt %>%
  filter(mask_id == 202376) %>%
  mutate(species = gsub(".*\\|s__", "", lineage))

sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Faecalibacterium", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_brewer(palette = "Dark2") +
  scale_color_brewer(palette = "Dark2") +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  theme_linedraw() +
  xlim(c(0,2100)) +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_faecali_bacteria1.pdf", width = 8, height = 2.8)

```

```{r}

pal = wes_palette("FantasticFox1", 15, type = "continuous")

merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Faecalibacterium") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup() %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  xlim(c(0,2100)) +
  theme_bw() +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_faecali_phage1.pdf", width = 8, height = 9.5)

```

```{r}
sub202376_dt %>%
  group_by(species) %>%
  summarize(observation = n()) %>%
  arrange(desc(observation)) #%>%
  filter(grepl("Dialister", species))
```

```{r}
sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Dialister", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_brewer(palette = "Dark2") +
  scale_color_brewer(palette = "Dark2") +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  theme_linedraw() +
  xlim(c(0,2100)) +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_dialister_bacteria1.pdf", width = 8, height = 2.8)

```

```{r}

pal = wes_palette("FantasticFox1", 15, type = "continuous")

merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Dialister") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup() %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  xlim(c(0,2100)) +
  theme_bw() +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_dialister_phage1.pdf", width = 8, height = 9.5)

```

Alistipes
```{r}
sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Alistipes", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_brewer(palette = "Dark2") +
  scale_color_brewer(palette = "Dark2") +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  theme_linedraw() +
  xlim(c(0,2100)) +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_Alistipes_bacteria1.pdf", width = 8, height = 2.8)

```

```{r}

pal = wes_palette("FantasticFox1", 15, type = "continuous")

merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Alistipes") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup() %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  xlim(c(0,2100)) +
  theme_bw() +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_Alistipes_phage1.pdf", width = 8, height = 9.5)

```


Roseburia
```{r}
sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Roseburia", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_brewer(palette = "Dark2") +
  scale_color_brewer(palette = "Dark2") +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  theme_linedraw() +
  xlim(c(0,2100)) +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_Roseburia_bacteria1.pdf", width = 8, height = 2.8)

```

```{r}

pal = wes_palette("FantasticFox1", 15, type = "continuous")

merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Roseburia") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup() %>%
  ggplot(aes(x = age_days, y = rel_abundance,color = reorder(species, avg_age), fill = reorder(species, avg_age))) +
  geom_col() +
  geom_line() +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  facet_wrap(vars(reorder(species, avg_age)),  
             ncol = 1, strip.position = "right", 
             labeller = label_wrap_gen(width = 13)) +
  xlim(c(0,2100)) +
  theme_bw() +
  theme(legend.position = "Off",
        strip.text.y.right = element_text(angle = 0))

ggsave(file = "/Users/michaeltisza/mike_tisza/sandbox/phage_bacteria_specific_subjects/sub202376_Roseburia_phage1.pdf", width = 8, height = 9.5)

```

202376 tilemap
```{r}
bac_fp_sub202376 <- sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Faecalibacterium", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ungroup()

bac_fp_sub202376$kingdom <- "Bacteria"

phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Faecalibacterium") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup()

phage_fp_sub202376$kingdom <- "Virus"

sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)

```

```{r}
tempp <- sub202376_fp_all_dt %>%
  mutate(species = gsub("Faecalibacterium", "F", species)) %>%
  ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)), 
             color = kingdom, size = rel_abundance)) +
  geom_point(alpha = 0.8) +
  scale_color_manual(values = c("orangered", "cadetblue")) +
  scale_size(name = "rel.\nabundance") +  
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "SGB", x = "day of life") +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "left")

detectp <- sub202376_fp_all_dt %>%
  group_by(species, kingdom, avg_age) %>%
  summarize(detected = n()) %>%
  ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
  geom_col() +
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "", x = "# times\ndetected") +
  theme(strip.text.y.right = element_blank(), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.text.y = element_blank())

combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))

combp
```


check most prevalent bacteria with few other SGBs in genus
```{r}
sub202376_dt %>% 
  filter(kingdom == "Bacteria") %>%
  mutate(genus = gsub("\\|s__.*", "", lineage),
         genus = gsub(".*\\|g__", "", genus)) %>%
  group_by(species) %>%
  mutate(detected = n()) %>%
  ungroup() %>%
  group_by(genus) %>%
  mutate(distinct_g = n_distinct(species)) %>%
  ungroup() %>%
  distinct(genus, species, detected, distinct_g) %>%
  arrange(distinct_g, desc(detected))
```

202376 tilemap
```{r}
bac_fp_sub202376 <- sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Flavonifractor", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ungroup()

bac_fp_sub202376$kingdom <- "Bacteria"

phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Flavonifractor") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup()

phage_fp_sub202376$kingdom <- "Virus"

sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)

```

```{r}
tempp <- sub202376_fp_all_dt %>%
  mutate(species = gsub("Flavonifractor", "F", species)) %>%
  ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)), 
             color = kingdom, size = rel_abundance)) +
  geom_point(alpha = 0.8) +
  scale_color_manual(values = c("orangered", "cadetblue")) +
  scale_size(name = "rel.\nabundance") +  
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "SGB", x = "day of life") +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "left")

detectp <- sub202376_fp_all_dt %>%
  group_by(species, kingdom, avg_age) %>%
  summarize(detected = n()) %>%
  ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
  geom_col() +
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "", x = "# times\ndetected") +
  theme(strip.text.y.right = element_blank(), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.text.y = element_blank())

combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))

combp
```

202376 tilemap
```{r}
bac_fp_sub202376 <- sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Escherichia", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ungroup()

bac_fp_sub202376$kingdom <- "Bacteria"

phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Escherichia") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup()

phage_fp_sub202376$kingdom <- "Virus"

sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)

```

```{r}
tempp <- sub202376_fp_all_dt %>%
  mutate(species = gsub("Escherichia", "E", species)) %>%
  ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)), 
             color = kingdom, size = rel_abundance)) +
  geom_point(alpha = 0.8) +
  scale_color_manual(values = c("orangered", "cadetblue")) +
  scale_size(name = "rel.\nabundance") +  
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "SGB", x = "day of life") +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "left")

detectp <- sub202376_fp_all_dt %>%
  group_by(species, kingdom, avg_age) %>%
  summarize(detected = n()) %>%
  ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
  geom_col() +
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "", x = "# times\ndetected") +
  theme(strip.text.y.right = element_blank(), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.text.y = element_blank())

combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))

combp
```

202376 tilemap
```{r}
bac_fp_sub202376 <- sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Barnesiella", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ungroup()

bac_fp_sub202376$kingdom <- "Bacteria"

phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Barnesiella") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup()

phage_fp_sub202376$kingdom <- "Virus"

sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)

```

```{r}
tempp <- sub202376_fp_all_dt %>%
  mutate(species = gsub("Barnesiella", "B", species)) %>%
  ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)), 
             color = kingdom, size = rel_abundance)) +
  geom_point(alpha = 0.8) +
  scale_color_manual(values = c("orangered", "cadetblue")) +
  scale_size(name = "rel.\nabundance") +  
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "SGB", x = "day of life") +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "left")

detectp <- sub202376_fp_all_dt %>%
  group_by(species, kingdom, avg_age) %>%
  summarize(detected = n()) %>%
  ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
  geom_col() +
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "", x = "# times\ndetected") +
  theme(strip.text.y.right = element_blank(), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.text.y = element_blank())

combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))

combp
```

Parabacteroides
202376 tilemap
```{r}
bac_fp_sub202376 <- sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Parabacteroides", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ungroup()

bac_fp_sub202376$kingdom <- "Bacteria"

phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Parabacteroides") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup()

phage_fp_sub202376$kingdom <- "Virus"

sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)

```

```{r}
tempp <- sub202376_fp_all_dt %>%
  mutate(species = gsub("Parabacteroides", "P", species)) %>%
  ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)), 
             color = kingdom, size = rel_abundance)) +
  geom_point(alpha = 0.8) +
  scale_color_manual(values = c("orangered", "cadetblue")) +
  scale_size(name = "rel.\nabundance") +  
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "SGB", x = "day of life") +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "left")

detectp <- sub202376_fp_all_dt %>%
  group_by(species, kingdom, avg_age) %>%
  summarize(detected = n()) %>%
  ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
  geom_col() +
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "", x = "# times\ndetected") +
  theme(strip.text.y.right = element_blank(), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.text.y = element_blank())

combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))

combp
```


Bacteroides
202376 tilemap
```{r}
bac_fp_sub202376 <- sub202376_dt %>%
  select(lineage, species, age_days, sampleID, rel_abundance) %>%
  complete(species, age_days) %>%
  filter(grepl("g__Blautia", lineage)) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days),
         species = gsub("_", " ",species),
         species = gsub("\\|t", "",species)) %>%
  ungroup()

bac_fp_sub202376$kingdom <- "Bacteria"

phage_fp_sub202376 <- merge(sub202376_dt, iphop_tax_dt, by = "species") %>%
  filter(host_genus == "g__Blautia") %>%
  group_by(species) %>%
  mutate(avg_age = mean(age_days)) %>%
  complete(age_days) %>%
  select(lineage, species, age_days, sampleID, rel_abundance, avg_age) %>%
  mutate(rel_abundance = ifelse(is.na(rel_abundance), 0, rel_abundance)) %>%
  ungroup()

phage_fp_sub202376$kingdom <- "Virus"

sub202376_fp_all_dt <- rbind(bac_fp_sub202376, phage_fp_sub202376)

```

```{r}
tempp <- sub202376_fp_all_dt %>%
  mutate(species = gsub("Veillonella", "V", species)) %>%
  ggplot(aes(x = factor(age_days), y = reorder(species, desc(avg_age)), 
             color = kingdom, size = rel_abundance)) +
  geom_point(alpha = 0.8) +
  scale_color_manual(values = c("orangered", "cadetblue")) +
  scale_size(name = "rel.\nabundance") +  
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "SGB", x = "day of life") +
  theme(strip.text.y.right = element_text(angle = 0),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        legend.position = "left")

detectp <- sub202376_fp_all_dt %>%
  group_by(species, kingdom, avg_age) %>%
  summarize(detected = n()) %>%
  ggplot(aes(x = detected, y = reorder(species, desc(avg_age)))) +
  geom_col() +
  facet_grid(vars(kingdom), scales = "free_y", space="free") +
  theme_bw() +
  labs(y = "", x = "# times\ndetected") +
  theme(strip.text.y.right = element_blank(), 
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        axis.text.y = element_blank())

combp <- plot_grid(tempp, detectp, align = "h", axis = "b", rel_widths = c(20, 3))

combp
```

